###########################################################################################################################
# GOV2001
# Final Papaer
# Martin Saveski and Abdullah Almaatouq
# May 4, 2015
###########################################################################################################################

setwd("~/Dropbox (MIT)/GOV2001/Product space/extension")

library(Zelig)
library(Hmisc)

###########################################################################################################################

load_data <- function(start_year, end_year, step, one_pair=F) {  
  vars_to_keep <- c("gdp_ppp", "population", "life_expectency", "years_of_education")
  
  df_all <- data.frame()
  country_features <- read.csv("../data/country_features.csv", header = T)
  eci_all_years <- read.csv("../data/ECI data/eci_all_years.csv", header = T)
  
  # fix years of education
  for(year in 1985:2005) {
    years_of_edu <- country_features[, sprintf("years_of_primary_education_%d", year)] + country_features[, sprintf("years_of_secondary_education_%d", year)]
    country_features[, sprintf("years_of_education_%d", year)] <- years_of_edu
  }
  
  for(year in seq(start_year, (end_year - step), by=step)) {
    cat("years:", sprintf("gdp_ppp_%d", year + step), sprintf("gdp_ppp_%d", year), "\n")
    
    country_features_year <- data.frame(country_features$code)
    names(country_features_year) <- c("code")
    
    for (var in vars_to_keep) {
      f_name <- sprintf("%s_%d", var, year)
      country_features_year[, var] <- country_features[, f_name]
    }
    country_features_year$growth <- (country_features[, sprintf("gdp_ppp_%d", year + step)] / country_features[, sprintf("gdp_ppp_%d", year)])
    country_features_year <- country_features_year[complete.cases(country_features_year), ]
    
    # load the ECI scores for this year
    eci_year <- eci_all_years[eci_all_years$year == year, ]
    
    # merge ECI + Country info
    df_year <- merge(country_features_year, eci_year[, c("code", "eci")])
    
    # append to the full matrix
    df_all <- rbind(df_all, df_year)
    
    # don't load all pairs
    if (one_pair == T) {
      break  
    }
  }

  return(df_all)
}

###########################################################################################################################

set.seed(17051989)
years_in_future <- 1:20
means <- c()
sds <- c()
ci_u <- c()
ci_d <- c()

for(step in years_in_future) {
  cat("step: ", step, "\n")

  # load data
  df <- load_data(1985, 2005, step=step, one_pair=F)
  df$growth <- log(df$growth)
  df$gdp_ppp <- log(df$gdp_ppp)
  df$population <- log(df$population)
  
  # run OLS
  z.out1 <- zelig(growth ~ gdp_ppp + population + life_expectency + years_of_education + eci, model = "ls", data = df)
  
  # Make low and high ECI examples
  fun <- median
  x.low  <- setx(z.out1,  eci = quantile(df$eci, 0.2), gdp_ppp=fun(df$gdp_ppp), population=fun(df$population), life_expectency=fun(df$life_expectency), years_of_education=fun(df$years_of_education))
  x.high <- setx(z.out1,  eci = quantile(df$eci, 0.8), gdp_ppp=fun(df$gdp_ppp), population=fun(df$population), life_expectency=fun(df$life_expectency), years_of_education=fun(df$years_of_education))
  
  # do simulation
  s.out1 <- sim(z.out1, x = x.low, x1 = x.high)
  
  # compute 1st differences
  mean.low <- mean(s.out1$sim.out$x$ev[[1]])
  mean.high <- mean(s.out1$sim.out$x1$ev[[1]])
  
  diff.all <- s.out1$sim.out$x1$ev[[1]] - s.out1$sim.out$x$ev[[1]]  
  
  means <- c(means, mean(diff.all))
  sds <- c(sds, sd(diff.all))
  ci_u <- c(ci_u, quantile(diff.all, prob = 0.95))
  ci_d <- c(ci_d, quantile(diff.all, prob = 0.05))
}

# plot with error bars
pdf("figures/future_growth.pdf", width=7, height=6.5)
#errbar(years_in_future, exp(means), exp(means + sds), exp(means - sds), xlab="Time increment [years]", ylab="Expected Growth (in %)", yaxt='n')
#errbar(years_in_future, exp(means), exp(ci_d), exp(ci_u), xlab="Time increment [years]", ylab="Expected Growth (in %)", yaxt='n')
plot(years_in_future, exp(means), xlab="Time increment [years]", ylab="Expected Growth (in %)", yaxt='n', ylim=c(0.97, 1.49))
segments(x0 = years_in_future, y0 = exp(ci_d), x1 = years_in_future, y1 = exp(ci_u))
axis(side=2, at=seq(1, 1.49, by=0.05), labels = seq(0, 45, 5))
dev.off()

# plot with error bands
s <- 0.0
plot(years_in_future, exp(means), type="l", xlab="Year in the Future", ylab="Expected Growth")
y.min <- smooth.spline(years_in_future, exp(means - sds), spar=s)$y
y.max <- smooth.spline(years_in_future, exp(means + sds), spar=s)$y
polygon(c(years_in_future, rev(years_in_future)), c(y.min, rev(y.max)), xpd = F, col = "#ADD8E6", border=NA)
lines(years_in_future, smooth.spline(years_in_future, exp(means), spar=s)$y)

# END